-- altera vhdl_input_version vhdl_2008
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library lpm;
USE lpm.lpm_components.all;

LIBRARY altera_mf;
USE altera_mf.altera_mf_components.all;

use work.fifo_pkg.all;

entity avalon_mm_sqrt is
	port (
		clk   : in std_logic;
		res_n : in std_logic;
		
		--memory mapped slave
		address   : in  std_logic_vector(0 downto 0);
		write     : in  std_logic;
		read      : in  std_logic;
		writedata : in  std_logic_vector(31 downto 0);
		readdata  : out std_logic_vector(31 downto 0)
	);
end entity;


architecture rtl of avalon_mm_sqrt is

	constant CAPACITY : integer := 64;
	constant SHORT_ZERO : std_logic_vector(15 downto 0) := (others => '0');
	constant LONG_ZERO : std_logic_vector(47 downto 0) := (others => '0');

	component alt_sqrt
		port (
			clk		: in std_logic;
			radical		: in std_logic_vector(47 downto 0);
			q		: out std_logic_vector(23 downto 0);
			remainder	: out std_logic_vector(24 downto 0)
		);
	end component;

	signal reg_shift_request : std_logic_vector(15 downto 0) := SHORT_ZERO;
	signal reg_next_shift_request : std_logic_vector(15 downto 0);

	signal alt_sqrt_done : std_logic := '0';
        
	signal fifo_in : fifo_in_t := FIFO_IN_NOP;
	signal fifo_out : fifo_out_t;

	type alt_sqrt_out_t is
	record
		q : std_logic_vector(23 downto 0);
		remainder : std_logic_vector(24 downto 0);
	end record;

	signal alt_sqrt_in_radical : std_logic_vector(47 downto 0) := LONG_ZERO;
	signal alt_sqrt_out : alt_sqrt_out_t;
	signal alt_sqrt_result : std_logic_vector(31 downto 0) := ZERO;

begin

	alt_fwft_fifo_inst : entity work.alt_fwft_fifo 
	generic map (
		DATA_WIDTH => 32,
		NUM_ELEMENTS => CAPACITY
	)
	port map (
		aclr => not res_n,
		clock => clk,
		data => fifo_in.data,
		rdreq => fifo_in.rdreq,
		wrreq => fifo_in.wrreq,
		empty => fifo_out.empty,
		full => fifo_out.full,
		q => fifo_out.q
	);

	alt_sqrt_inst : component alt_sqrt
	port map (
		clk => clk,
		radical	=> alt_sqrt_in_radical,
		q => alt_sqrt_out.q,
		remainder => alt_sqrt_out.remainder
	);

	readdata <= fifo_out.q when address(0) else (0 => fifo_out.empty, others => '0');

	alt_sqrt_done <= reg_shift_request(15);

	fifo_in.wrreq <= alt_sqrt_done;
	fifo_in.data <= alt_sqrt_result;
	fifo_in.rdreq <= read and address(0) and not fifo_out.empty;

	sync : process(clk, res_n)
	begin
		if res_n = '0' then
			-- reset values
			reg_shift_request <= SHORT_ZERO;
		elsif rising_edge(clk) then
			-- register transfer
			reg_shift_request <= reg_next_shift_request;
		end if;
	end process;

	async : process(all)
	begin
		-- implement shift register for enqueue operations (to know when alt_sqrt is done)
		reg_next_shift_request(0) <= (not address(0)) and write;
		reg_next_shift_request(reg_shift_request'high downto 1) <= reg_shift_request(reg_shift_request'high - 1 downto 0);

		-- extend data by 16 bits and then shift up
		alt_sqrt_in_radical <= writedata & SHORT_ZERO;
		-- sign extend result
		--alt_sqrt_result <= std_logic_vector(shift_right(signed(alt_sqrt_out.q) & x"00", 8));
		alt_sqrt_result <= x"00" & alt_sqrt_out.q;
	end process;

end architecture;

